Prepare environmental data from woa and others

Read raw env data, clean it and format it nicely.

Author

Thelma Panaïotis, Laetitia Drago & Jean-Olivier Irisson

source("utils.R")

Predictors overview

List

Aim to predict DOC in 3 layers:

  • surface: 0-10 m

  • mesopelagic: 200 - 1000 m

  • bathypelagic: > 1000 m

The table below details the predictors used for each layer.

Predictors set.
What From where Timescale Depth range Layers Predictor for
Temperature WOA Seasonal 0-5500
  • surface

  • meso

  • bathy

  • surface/meso/bathy

  • meso/bathy

  • bathy

Salinity WOA Seasonal 0-5500
  • surface

  • meso

  • bathy

  • surface/meso/bathy

  • meso/bathy

  • bathy

Density WOA Seasonal 0-5500?
  • surface

  • meso

  • bathy

  • surface/meso/bathy

  • meso/bathy

  • bathy

Oxygen WOA Seasonal 0-1500
  • surface

  • meso

  • bathy

  • surface/meso/bathy

  • meso/bathy

  • bathy

AOU WOA Seasonal 0-1500?
  • surface

  • meso

  • bathy

  • surface/meso/bathy

  • meso/bathy

  • bathy

Silicate WOA Seasonal 0-800
  • surface

  • meso

  • surface/meso/bathy

  • meso/bathy

Phosphate WOA Seasonal 0-800
  • surface

  • meso

  • surface/meso/bathy

  • meso/bathy

Nitrate WOA Seasonal 0-800
  • surface

  • meso

  • surface/meso/bathy

  • meso/bathy

MLD WOA Seasonal Depth value surface/meso/bathy
Thermocline To compute Seasonal Depth value surface/meso/bathy
Pycnocline To compute Seasonal Depth value surface/meso/bathy
N-cline To compute Seasonal Depth value surface/meso/bathy
P-cline To compute Seasonal Depth value surface/meso/bathy
S-cline To compute Seasonal Depth value surface/meso/bathy
Zeu Cael Monthly Depth value surface/meso/bathy
NPP Cael Monthly Surface surface/meso/bathy
BBP Cael Monthly Surface surface/meso/bathy
Fmicro Cael Monthly Surface surface/meso/bathy
log(chl) Cael Monthly Surface surface/meso/bathy
PIC Cael Monthly Surface surface/meso/bathy
φ PSD slope Cael Monthly Surface surface/meso/bathy
Irradiance Cael Monthly Surface surface/meso/bathy

Processing

We need:

  • seasonal + annual values at the surface (because surface data are also used to predict meso and bathy, with annual timescale)

  • annual values in the meso and bathy

WOA

Download

Perform only if necessary.

if (download_woa){
  # define all combinations of variables to download
  df <- read_csv(
    "var,abbrv,period
    temperature,t,A5B7
    salinity,s,A5B7
    density,I,A5B7
    mld,M02,A5B7
    AOU,A,all
    silicate,i,all
    phosphate,p,all
    nitrate,n,all
    oxygen,o,all
  ", show_col_types = FALSE)
  month <- sprintf("%02i",13:16) # seasons
  combi <- crossing(df, month)

  # define download URLs
  urls <- str_glue("https://data.nodc.noaa.gov/thredds/fileServer/ncei/woa/{var}/{period}/1.00/woa18_{period}_{abbrv}{month}_01.nc", .envir = combi)

  # and download files
  lapply(urls, function(url) {
    dest <- file.path(woa_dir, basename(url))
    if (!file.exists(dest)) { # if not previously downloaded
      message(basename(url))
      download.file(url, destfile = dest)
      Sys.sleep(10)
    }

  })
  message("Done downloading WOA data")

  # create links to the downloaded files with easier names
  ok <- file.symlink(
    from = str_glue("{woa_dir}/woa18_{period}_{abbrv}{month}_01.nc", .envir = combi),
    to = file.path(woa_loc, str_glue("{var}_{month}.nc", .envir = mutate(combi, month = as.numeric(month))))
  )
  all(ok)
}

Read

# List WOA variables
vars <- c("temperature", "salinity", "density", "mld", "oxygen", "aou", "silicate", "phosphate", "nitrate")

# Open one file to get all coordinates (lon, lat, depth)
nc <- nc_open(file.path(woa_loc, "temperature_13.nc"))
lon <- ncvar_get(nc, "lon")
lat <- ncvar_get(nc, "lat")
depth <- ncvar_get(nc, "depth")
# Get indexes of relevant depth
#depth_idx <- which(depth <= max_depth_woa)
# Limit depth to chosen max depth
#depth <- ncvar_get(nc, "depth", count = max(depth_idx))
# Number of depth values
depth_count <- length(depth)
# Close the file
nc_close(nc)


# Read all files in parallel
woa <- pbmclapply(vars, function(var) {
    # prepare storage for one variable at n depths for 4 seasons
    block <- array(NA, c(360, 180, depth_count, 4))

  for (seas in 1:4) {
    # define the file to read
    file <- str_c(woa_loc, "/", var, "_", seas + 12, ".nc")
    # open the file and read the data in it
    nc <- nc_open(file)
    # get depth count (differs between variables)
    depth_count <- length(ncvar_get(nc, "depth"))

    block[,,1:depth_count,seas] <- ncvar_get(nc, varid=names(nc$var)[6], count = c(360, 180, depth_count, 1))
    #block[,,,month] <- ncvar_get(nc, varid=names(nc$var)[6])
    nc_close(nc)
  }
  return(block)
}, mc.cores = min(length(vars), n_cores))

# Add variable names
names(woa) <- vars
str(woa)
List of 9
 $ temperature: num [1:360, 1:180, 1:102, 1:4] NA NA NA NA NA NA NA NA NA NA ...
 $ salinity   : num [1:360, 1:180, 1:102, 1:4] NA NA NA NA NA NA NA NA NA NA ...
 $ density    : num [1:360, 1:180, 1:102, 1:4] NA NA NA NA NA NA NA NA NA NA ...
 $ mld        : num [1:360, 1:180, 1:102, 1:4] NA NA NA NA NA NA NA NA NA NA ...
 $ oxygen     : num [1:360, 1:180, 1:102, 1:4] NA NA NA NA NA NA NA NA NA NA ...
 $ aou        : num [1:360, 1:180, 1:102, 1:4] NA NA NA NA NA NA NA NA NA NA ...
 $ silicate   : num [1:360, 1:180, 1:102, 1:4] NA NA NA NA NA NA NA NA NA NA ...
 $ phosphate  : num [1:360, 1:180, 1:102, 1:4] NA NA NA NA NA NA NA NA NA NA ...
 $ nitrate    : num [1:360, 1:180, 1:102, 1:4] NA NA NA NA NA NA NA NA NA NA ...

Plot surface values for winter.

image.plot(woa$temperature[,,1,1], col = col_temp, main = "Temperature")

image.plot(woa$salinity[,,1,1], col = col_sal, main = "Salinity")

image.plot(woa$density[,,1,1], col = col_dens, main = "Density")

image.plot(woa$mld[,,1,1], col = col_depth, main = "MLD")

image.plot(woa$oxygen[,,1,1], col = col_oxy, main = "Oxygen")

image.plot(woa$aou[,,1,1], col = col_aou, main = "AOU")

image.plot(woa$nitrate[,,1,1], col = col_nit, main = "Nitrate")

image.plot(woa$phosphate[,,1,1], col = col_phos, main = "Phosphate")

image.plot(woa$silicate[,,1,1], col = col_sil, main = "Silicate")

Compute clines

Thermocline

thermo <- pbmclapply(1:4, function(m) { # in parallel
  apply(woa$temperature[,,,m], c(1,2), function(temp) {
    # check number of available data points
    ok <- !is.na(temp)
    if (sum(ok) > 3) {
      # sequence of regular depth for interpolation
      depths_i <- seq(0, max_depth_woa, by = 5) 
      # interpolate temperature on 5 m steps
      temp_i <- interpolate(depth[ok], temp[ok], depths_i, method = "spline", extrapolate = FALSE)
      # compute thermocline depth
      ok <- !is.na(temp_i)
      pyc <- clined(temp_i[ok], depths_i[ok], n.smooth = 2, k = 4)
    } else {
      pyc <- NA
    }
    return(pyc)
  })
}, mc.cores = n_cores)  # looooong, even in parallel
walk(thermo, image.plot, col = col_depth)

# smooth the result to avoid local artefacts
thermo <- lapply(thermo, function(x) {
  xs <- image.smooth(x, theta = 1)$z
  return(xs)
})
walk(thermo, image.plot, col = col_depth)

# combine all months into a matrix
thermo <- do.call(abind, list(thermo, along = 3))

Pycnocline

For this, we use data down to 500 metres.

pycno <- pbmclapply(1:4, function(m) { # in parallel
  apply(woa$density[,,,m], c(1,2), function(dens) {
    # check number of available data points
    ok <- !is.na(dens)
    if (sum(ok) > 3) {
      # sequence of regular depth for interpolation
      depths_i <- seq(0, max_depth_woa, by = 5) 
      # interpolate density on 5 m steps
      dens_i <- interpolate(depth[ok], dens[ok], depths_i, method = "spline", extrapolate = FALSE)
      # compute pycnocline depth
      ok <- !is.na(dens_i)
      pyc <- clined(dens_i[ok], depths_i[ok], n.smooth = 2, k = 4)
    } else {
      pyc <- NA
    }
    return(pyc)
  })
}, mc.cores = n_cores)  # looooong, even in parallel
walk(pycno, image.plot, col = col_depth)

# smooth the result to avoid local artefacts
pycno <- lapply(pycno, function(x) {
  xs <- image.smooth(x, theta = 1)$z
  return(xs)
})
walk(pycno, image.plot, col = col_depth)

# combine all months into a matrix
pycno <- do.call(abind, list(pycno, along = 3))

Nutriclines

Let’s now compute clines for nitrates, phosphates and silicates.

Nitrate
n_cline <- pbmclapply(1:4, function(m) { # in parallel
  apply(woa$nitrate[,,,m], c(1,2), function(nit) {
    # check number of available data points
    ok <- !is.na(nit)
    if (sum(ok) > 3) {
      # sequence of regular depth for interpolation
      depths_i <- seq(0, max_depth_woa, by = 5) 
      # interpolate nitrate on 5 m steps
      nit_i <- interpolate(depth[ok], nit[ok], depths_i, method = "spline", extrapolate = FALSE)
      # compute nitracline depth
      ok <- !is.na(nit_i)
      pyc <- clined(nit_i[ok], depths_i[ok], n.smooth = 2, k = 4)
    } else {
      pyc <- NA
    }
    return(pyc)
  })
}, mc.cores=n_cores)  # looooong, even in parallel
walk(n_cline, image.plot, col = col_depth)

# smooth the result to avoid local artefacts
n_cline <- lapply(n_cline, function(x) {
  xs <- image.smooth(x, theta = 1)$z
  return(xs)
})
walk(n_cline, image.plot, col = col_depth)

# combine all months into a matrix
n_cline <- do.call(abind, list(n_cline, along = 3))
Phosphate
p_cline <- pbmclapply(1:4, function(m) { # in parallel
  apply(woa$phosphate[,,,m], c(1,2), function(phos) {
    # check number of available data points
    ok <- !is.na(phos)
    if (sum(ok) > 3) {
      # sequence of regular depth for interpolation
      depths_i <- seq(0, max_depth_woa, by = 5) 
      # interpolate phosphate on 5 m steps
      phos_i <- interpolate(depth[ok], phos[ok], depths_i, method = "spline", extrapolate = FALSE)
      # compute phosphacline depth
      ok <- !is.na(phos_i)
      pyc <- clined(phos_i[ok], depths_i[ok], n.smooth = 2, k = 4)
    } else {
      pyc <- NA
    }
    return(pyc)
  })
}, mc.cores=n_cores)  # looooong, even in parallel
walk(p_cline, image.plot, col = col_depth)

# smooth the result to avoid local artefacts
p_cline <- lapply(p_cline, function(x) {
  xs <- image.smooth(x, theta = 1)$z
  return(xs)
})
walk(p_cline, image.plot, col = col_depth)

# combine all months into a matrix
p_cline <- do.call(abind, list(p_cline, along = 3))
Silicate
s_cline <- pbmclapply(1:4, function(m) { # in parallel
  apply(woa$silicate[,,,m], c(1,2), function(sil) {
    # check number of available data points
    ok <- !is.na(sil)
    if (sum(ok) > 3) {
      # sequence of regular depth for interpolation
      depths_i <- seq(0, max_depth_woa, by = 5) 
      # interpolate silicate on 5 m steps
      sil_i <- interpolate(depth[ok], sil[ok], depths_i, method = "spline", extrapolate = FALSE)
      # compute phosphacline depth
      ok <- !is.na(sil_i)
      pyc <- clined(sil_i[ok], depths_i[ok], n.smooth = 2, k = 4)
    } else {
      pyc <- NA
    }
    return(pyc)
  })
}, mc.cores=n_cores)  # looooong, even in parallel
walk(s_cline, image.plot, col = col_depth)

# smooth the result to avoid local artefacts
s_cline <- lapply(s_cline, function(x) {
  xs <- image.smooth(x, theta = 1)$z
  return(xs)
})
walk(s_cline, image.plot, col = col_depth)

# combine all months into a matrix
s_cline <- do.call(abind, list(s_cline, along = 3))

GLODAP

For nutrients in the bathypelagic layer.

# List of files
files_gl <- list.files("data/raw/GLODAPv2.2016b_MappedClimatologies", pattern = ".nc", full.names = TRUE)
# Get files for nutrients
files_gl <- files_gl[str_detect(files_gl, "NO3|PO4|silicate")]
# List of var names from files
vars_gl <- str_extract(files_gl, "((?<=2016b\\.).+)(.(?=.nc))")

# Open one file to get dimensions
nc <- nc_open(files_gl[1])
lon_gl <- ncvar_get(nc, "lon")
rank_lon_gl <- rank(lon_gl) # rank of longitudes to reorganise
lat_gl <- ncvar_get(nc, "lat")
depth_gl <- ncvar_get(nc, "Depth")
# Get indexes of bathypelagic depths
depth_idx_gl <- which(depth_gl > 1000)
# Get corresponding depths
depth_gl <- ncvar_get(nc, "Depth", start = min(depth_idx_gl))
# Number of depth values
depth_count_gl <- length(depth_idx_gl)
# Close the file
nc_close(nc)

# Read all files in parallel
glodap <- mclapply(files_gl, function(file) {
  
  # get variable name from file
  var <- str_extract(file, "((?<=2016b\\.).+)(.(?=.nc))")
  
  # open, read, close
  nc <- nc_open(file)
  block <- ncvar_get(nc, varid = var, start = c(1, 1, min(depth_idx_gl)), count = c(360, 180, depth_count_gl))
  nc_close(nc)
  
  # reorder longitudes to centre on Greenwidch
  block <- block[c(rank_lon_gl[lon_gl > 180], rank_lon_gl[lon_gl <= 180]),,]
  
  return(block)
}, mc.cores = min(length(files_gl), n_cores))

# Add variable names, not using original names
names(glodap) <- c("nitrate", "phosphate", "silicate")

# Correct longitudes from [20.5:379.5] to [-179.5:179.5]
lon_gl <- ifelse(lon_gl > 360, lon_gl - 360, lon_gl) %>% sort()
lon_gl <- lon_gl - 180

image.plot(glodap$nitrate[,,1], col   = col_nit, main = "Nitrate top bathy")

image.plot(glodap$phosphate[,,1], col = col_phos, main = "Phosphate top bathy")

image.plot(glodap$silicate[,,1],  col = col_sil, main = "Silicate top bathy")

Average over layers

For each variable measured along depth, compute the average in

  • the surface layer: 0 to 10 m.

  • the mesopelagic layer: 200 to 1000 m

  • the bathypelagic: below 1000 m

## For WOA
# List variables to process and remove mld (it is a single value per pixel)
vars <- names(woa)
vars <- vars[vars != "mld"]

env_m <- pbmclapply(vars, function(my_var) {
  message(my_var)

  # extract variable
  X <- woa[[my_var]]

  # prepare storage
  res <- array(NA, dim(X)[-3])
  res <- list(surf = res, meso = res, bathy = res)

  # for each pixel of each month
  for (i in seq(along = lon)) {
    for (j in seq(along = lat)) {
      for (m in 1:4) {
        ## Surface data
        # compute average if 2/3 of data is present (3 values expected in 0-10 m, good to have at least 2 of them)
        depth_idx <- depth <= surface_bottom # depth indices above 10 m
        keep <- X[i, j, depth_idx, m]
        if (percent_na(keep) <= 1/3) {
          res$surf[i, j, m] <- mean(keep, na.rm = TRUE)
         # otherwise just leave the NA value
        }
        
        ## Mesopelagic
        # compute average if 80% of data is present
        depth_idx <- depth > meso_top & depth <= meso_bottom  # depth indices between 200 and 1000 m
        keep <- X[i, j, depth_idx, m]
        if (percent_na(keep) <= 0.2) {
          res$meso[i, j, m] <- mean(keep, na.rm = TRUE)
         # otherwise just leave the NA value
        }
        
        ## Bathypelagic
        # compute average if at least 3 values are present
        depth_idx <- depth > meso_bottom # depth indices deeper than 1000 m
        keep <- X[i, j, depth_idx, m]
        if (sum(!is.na(keep)) > 2) {
          res$bathy[i, j, m] <- mean(keep, na.rm = TRUE)
         # otherwise just leave the NA value
        }
      }
    }
  }
  return(res)
}, mc.cores = min(length(vars), n_cores))

# Add variable names
names(env_m) <- vars

# flatten and keep layers next to each other
env_m <- unlist(env_m, recursive = FALSE)

## Add bathy nutrients from GLODAP
# Replicate 4 times to simulate seasons, will later be averaged into seasonal
env_m$nitrate.bathy <-   replicate(4, apply(glodap$nitrate, c(1,2), mean, na.rm = TRUE)) %>% abind(along = 3)
env_m$phosphate.bathy <- replicate(4, apply(glodap$phosphate, c(1,2), mean, na.rm = TRUE)) %>% abind(along = 3)
env_m$silicate.bathy <-  replicate(4, apply(glodap$silicate, c(1,2), mean, na.rm = TRUE)) %>% abind(along = 3)

Other climatologies

Misc from satellite

Easy one: read the .mat file, reshape lon vs lat and reverse lat. The grid is already 1°×1°×12 months.

Warning

Check grid definition

# List variables available in climatology
clim <- readMat("data/raw/clim4cael.mat")
vars <- names(clim)
vars <- vars[vars != "cbpm"] # ignore cbpm, we will use cafes

clim <- pbmclapply(vars, function(my_var) {
  # Read data for variables
  data <- read_clim("data/raw/clim4cael.mat", my_var, yearly = FALSE)
  # Convert from monthly to seasonal
  data <- month_to_seas(data, to_array = TRUE)
  return(data)
}, mc.cores = n_cores)
# set nice names
names(clim) <- c("bbp", "npp", "fmicro", "log_chl", "z_eu")

## Others climatology to read: picpod and PSD_slope
pic <- read_clim("data/raw/picpoc.mat", "pic", yearly = FALSE)
clim$pic <- month_to_seas(pic, to_array = TRUE)

poc <- read_clim("data/raw/picpoc.mat", "poc", yearly = FALSE)
clim$poc <- month_to_seas(poc, to_array = TRUE)

psd <- read_clim("data/raw/PSD_slope.mat", "Xi", yearly = FALSE)
clim$psd <- month_to_seas(psd, to_array = TRUE)

# Plot seasonal maps
apply(clim$npp, 3, image.plot, col = col_npp, main = "NPP (cafe)")

NULL
apply(clim$z_eu, 3, image.plot, col = col_depth, main = "Euphotic depth")

NULL
#apply(clim$bpp, 3, image.plot, col = col_bbp, main = "BBP")
apply(clim$fmicro, 3, image.plot, col = col_fmicro, main = "Fmicro")

NULL
apply(clim$log_chl, 3, image.plot, col = col_chl, main = "log(Chl)")

NULL
apply(clim$pic, 3, image.plot, col = col_pic, main = "PIC")

NULL
apply(clim$poc, 3, image.plot, col = col_poc, main = "POC")

NULL
apply(clim$psd, 3, image.plot, col = col_psd, main = "Phyto PSD slope")

NULL

Irradiance data

Climatology data in 12 netcdf files. Raw resolution is very high and needs to be downgraded to a 1°×1° grid.

Let’s start by just reading 1 file to get the dimensions of the grid.

# Open one file to get all coordinates (lon, lat)
nc <- nc_open("data/raw/modis/AQUA_MODIS.20020701_20210731.L3m.MC.PAR.par.9km.nc")
lon_par <- ncvar_get(nc, "lon")
lat_par <- ncvar_get(nc, "lat")
lat_par <- rev(lat_par) # lat will need to be reversed
nc_close(nc)

Now let’s read all files.

# List par files
par_files <- list.files("data/raw/modis", full.names = TRUE, pattern = ".nc")
# Get months from file names, NB this is important because the order is not trivial!
par_months <- par_files %>% str_extract("[:digit:]{6}") %>% str_extract("[:digit:]{2}$")
# Reorder par files
par_files <- par_files[order(par_months)]

# Read files for each month
par <- mclapply(1:12, function(m) {
  # open nc file
  nc <- nc_open(par_files[m])
  # read par values for a given month
  par_m <- ncvar_get(nc, "par")
  # reorder latitudes
  par_m <- par_m[, ncol(par_m):1]
  # close file
  nc_close(nc)
  return(par_m)
})
# convert list to matrix
par <- do.call(abind, list(par, along = 3))

# Convert monthly to seasonal
par <- month_to_seas(par, to_array = TRUE)

PAR values are stored in a 4320 by 2160 by 4array. To downgrade the grid, we first need to convert this data to a dataframe. Let’s process in parallel by season. For each season, floor lon and lat to 1° and add 0.5 to get the center of every pixel of the 1°×1° grid, then average PAR value per grid cell.

# add dimension names
dimnames(par) <- list(lon_par, lat_par, 1:4)

# melt to dataframe
df_par <- reshape2::melt(par) %>%
  as_tibble() %>%
  rename(lon = Var1, lat = Var2, season = Var3, par = value)
  
# Round lon and lat to 1 degree and average per grid cell
df_par <- df_par %>%
  mutate(
    lon = roundp(lon, precision = 1, f = floor) + 0.5,
    lat = roundp(lat, precision = 1, f = floor) + 0.5
  ) %>%
  group_by(lon, lat, season) %>%
  summarise(par = mean(par, na.rm = TRUE), .groups = "drop") %>%
  ungroup()

# Plot map
ggmap(df_par, "par", type = "raster") + facet_wrap(~season)

Combine all env variables

Let’s combine array formatted env variables together.

# Join layer-wise data and surface data
env_m <- c(env_m, clim)

# Add MLD
# NB: MLD data was read similarly to other WOA data, i.e. depth wise, which does not make sense for MLD because we have only 1 value per pixel. 
# Thus, let’s retain only the first depth, which contains the MLD value for each pixel
mld <- woa$mld[,,1,] # first depth
dim(mld) # lon * lat * season: perfect!
[1] 360 180   4
env_m$mld <- mld

# Add other clines
env_m$thermo <- thermo
env_m$pycno <- pycno
env_m$n_cline <- n_cline
env_m$p_cline <- p_cline
env_m$s_cline <- s_cline

# Clean dimnames
env_m <- lapply(env_m, function(el) {
  dimnames(el) <- NULL
  return(el)
})
str(env_m)
List of 38
 $ temperature.surf : num [1:360, 1:180, 1:4] NA NA NA NA NA NA NA NA NA NA ...
 $ temperature.meso : num [1:360, 1:180, 1:4] NA NA NA NA NA NA NA NA NA NA ...
 $ temperature.bathy: num [1:360, 1:180, 1:4] NA NA NA NA NA NA NA NA NA NA ...
 $ salinity.surf    : num [1:360, 1:180, 1:4] NA NA NA NA NA NA NA NA NA NA ...
 $ salinity.meso    : num [1:360, 1:180, 1:4] NA NA NA NA NA NA NA NA NA NA ...
 $ salinity.bathy   : num [1:360, 1:180, 1:4] NA NA NA NA NA NA NA NA NA NA ...
 $ density.surf     : num [1:360, 1:180, 1:4] NA NA NA NA NA NA NA NA NA NA ...
 $ density.meso     : num [1:360, 1:180, 1:4] NA NA NA NA NA NA NA NA NA NA ...
 $ density.bathy    : num [1:360, 1:180, 1:4] NA NA NA NA NA NA NA NA NA NA ...
 $ oxygen.surf      : num [1:360, 1:180, 1:4] NA NA NA NA NA NA NA NA NA NA ...
 $ oxygen.meso      : num [1:360, 1:180, 1:4] NA NA NA NA NA NA NA NA NA NA ...
 $ oxygen.bathy     : num [1:360, 1:180, 1:4] NA NA NA NA NA NA NA NA NA NA ...
 $ aou.surf         : num [1:360, 1:180, 1:4] NA NA NA NA NA NA NA NA NA NA ...
 $ aou.meso         : num [1:360, 1:180, 1:4] NA NA NA NA NA NA NA NA NA NA ...
 $ aou.bathy        : num [1:360, 1:180, 1:4] NA NA NA NA NA NA NA NA NA NA ...
 $ silicate.surf    : num [1:360, 1:180, 1:4] NA NA NA NA NA NA NA NA NA NA ...
 $ silicate.meso    : num [1:360, 1:180, 1:4] NA NA NA NA NA NA NA NA NA NA ...
 $ silicate.bathy   : num [1:360, 1:180, 1:4] NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ...
 $ phosphate.surf   : num [1:360, 1:180, 1:4] NA NA NA NA NA NA NA NA NA NA ...
 $ phosphate.meso   : num [1:360, 1:180, 1:4] NA NA NA NA NA NA NA NA NA NA ...
 $ phosphate.bathy  : num [1:360, 1:180, 1:4] NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ...
 $ nitrate.surf     : num [1:360, 1:180, 1:4] NA NA NA NA NA NA NA NA NA NA ...
 $ nitrate.meso     : num [1:360, 1:180, 1:4] NA NA NA NA NA NA NA NA NA NA ...
 $ nitrate.bathy    : num [1:360, 1:180, 1:4] NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ...
 $ bbp              : num [1:360, 1:180, 1:4] NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ...
 $ npp              : num [1:360, 1:180, 1:4] NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ...
 $ fmicro           : num [1:360, 1:180, 1:4] NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ...
 $ log_chl          : num [1:360, 1:180, 1:4] NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ...
 $ z_eu             : num [1:360, 1:180, 1:4] NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ...
 $ pic              : num [1:360, 1:180, 1:4] NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ...
 $ poc              : num [1:360, 1:180, 1:4] NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ...
 $ psd              : num [1:360, 1:180, 1:4] NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN ...
 $ mld              : num [1:360, 1:180, 1:4] NA NA NA NA NA NA NA NA NA NA ...
 $ thermo           : num [1:360, 1:180, 1:4] NA NA NA NA NA NA NA NA NA NA ...
 $ pycno            : num [1:360, 1:180, 1:4] NA NA NA NA NA NA NA NA NA NA ...
 $ n_cline          : num [1:360, 1:180, 1:4] NA NA NA NA NA NA NA NA NA NA ...
 $ p_cline          : num [1:360, 1:180, 1:4] NA NA NA NA NA NA NA NA NA NA ...
 $ s_cline          : num [1:360, 1:180, 1:4] NA NA NA NA NA NA NA NA NA NA ...
# Same dim for everyone, perfect!

Convert to dataframe

# unroll each matrix
env_v <- lapply(env_m, function(e) {as.vector(e)})
# combine as columns
df_env_m <- do.call(cbind, env_v) %>% as.data.frame() %>% setNames(names(env_v))

# add coordinates (NB: shorter elements are recycled automatically)
df_env_m$lon <- lon
df_env_m$lat <- rep(lat, each = length(lon))
df_env_m$season <- rep(1:4, each = length(lon)*length(lat))


## Create a column to distinguish between layers
#df_env_m <- df_env_m %>% 
#  select(lon, lat, season, everything()) %>% 
#  pivot_longer(temperature.surf:nitrate.meso) %>% # initially one variable per layer, 
#  separate_wider_delim(name, into = c("variable", "layer"), sep = "\\.") %>% # get layer and variable
#  pivot_wider(names_from = "variable", values_from = "value") %>% # recreate columns from variables
#  select(lon, lat, season, layer, everything())

Then join with PAR data.

df_env_m <- df_env_m %>% left_join(df_par, by = join_by(lon, lat, season)) %>% select(lon, lat, season, everything()) %>% as_tibble()

Let’s do a quick plot to check everything is fine.

ggmap(df_env_m, "temperature.surf", type = "raster") + facet_wrap(~season)

Remove internal seas, lakes and land

# determine which points are in land
lons <- df_env_m$lon
lats <- df_env_m$lat
inland <- sp::point.in.polygon(lons, lats, coast$lon, coast$lat) == 1
ggplot(df_env_m) +
  geom_raster(aes(x = lon, y = lat, fill = inland)) +
  scale_fill_viridis_d() +
  scale_x_continuous(expand = c(0,0)) + scale_y_continuous(expand = c(0,0)) +
  coord_quickmap()

# remove South Pole
inland[lats < -85] <- TRUE

# remove Black Sea too
inland[between(lons, 20, 50) & between(lats, 41, 50)] <- TRUE

# remove Baltic Sea too
inland[between(lons, 12, 30) & between(lats, 52, 66)] <- TRUE

ggplot(df_env_m) +
  geom_raster(aes(x = lon, y = lat, fill = inland)) +
  scale_fill_viridis_d() +
  scale_x_continuous(expand = c(0,0)) + scale_y_continuous(expand = c(0,0)) +
  coord_quickmap()

# blankout points in land
df_env_m[inland, !names(df_env_m) %in% c("lon", "lat", "season")] <- NA
# NB: this works because `inland` gets automatically repeated for everymonth

# Convert to tibble and reorder columns
df_env_m <- df_env_m %>% as_tibble() %>% select(lon, lat, everything())

Plot a few maps without land to check.

ggmap(df_env_m, "density.surf", type = "raster", land = FALSE) + facet_wrap(~season)

ggmap(df_env_m, "npp", type = "raster", land = FALSE) + facet_wrap(~season)

ggmap(df_env_m, "mld", type = "raster", land = FALSE) + facet_wrap(~season)

Seems all good!

Compute annual climatology

df_env_ann <- df_env_m %>% 
  mutate(season = 0) %>% # to yearly
  group_by(lon, lat, season) %>% 
  summarise_all(mean, na.rm = TRUE) %>% 
  ungroup()

# Drop seasonal data for meso and bathy
df_env_m <- df_env_m %>% select(-contains(c("meso", "bathy")))

# Store annual with monthly data
df_env <- bind_rows(df_env_m, df_env_ann)

Do some sanity checks.

## seasonal data is only in surf #ok
#df_env %>% filter(season != 0) %>% select(-contains(c("meso", "bathy"))) %>% summary()
#df_env %>% filter(season != 0) %>% select(contains(c("meso", "bathy"))) %>% summary()
#
## annual data is in all 3 layers
#df_env %>% filter(season == 0) %>% summary()
#
## All ok!

Save environmental data

save(df_env, file = "data/00.all_env.Rdata")

Plot all maps

Annual

# Names of env variables
var_names <- df_env %>% select(-c(lon, lat, season)) %>% colnames()
plot_list <- list()
for (i in 1:length(var_names)){
  plot_list[[i]] <- ggmap(
    df_env %>% filter(season == 0), 
    var_names[i], 
    type = "raster"
  ) +
  ggtitle(var_names[i])
}
plot_list
[[1]]


[[2]]


[[3]]


[[4]]


[[5]]


[[6]]


[[7]]


[[8]]


[[9]]


[[10]]


[[11]]


[[12]]


[[13]]


[[14]]


[[15]]


[[16]]


[[17]]


[[18]]


[[19]]


[[20]]


[[21]]


[[22]]


[[23]]


[[24]]


[[25]]


[[26]]


[[27]]


[[28]]


[[29]]


[[30]]


[[31]]


[[32]]


[[33]]


[[34]]


[[35]]


[[36]]


[[37]]


[[38]]


[[39]]

Seasonal

Here we do not expect maps in the meso and bathypelagic.

plot_list <- list()
for (i in 1:length(var_names)){
  plot_list[[i]] <- ggmap(
    df_env %>% filter(season != 0), 
    var_names[i], 
    type = "raster"
  ) + 
  facet_wrap(~season) +
  ggtitle(var_names[i])
}
plot_list
[[1]]


[[2]]


[[3]]


[[4]]


[[5]]


[[6]]


[[7]]


[[8]]


[[9]]


[[10]]


[[11]]


[[12]]


[[13]]


[[14]]


[[15]]


[[16]]


[[17]]


[[18]]


[[19]]


[[20]]


[[21]]


[[22]]


[[23]]


[[24]]


[[25]]


[[26]]


[[27]]


[[28]]


[[29]]


[[30]]


[[31]]


[[32]]


[[33]]


[[34]]


[[35]]


[[36]]


[[37]]


[[38]]


[[39]]